package opt_web_crawlers

import (
	"fmt"
	"github.com/PuerkitoBio/goquery"
	"github.com/bwmarrin/snowflake"
	"gorm.io/driver/postgres"
	"gorm.io/gorm"
	"gorm.io/gorm/logger"
	"gorm.io/gorm/schema"
	"io"
	"log"
	"net/http"
	"os"
	"strings"
	"time"
)

func check_err(err error) {
	if err != nil {
		log.Fatalln("catch a err ", err)
	}
}
func Postgre_conn() *gorm.DB {

	logger := logger.New(log.New(os.Stdout, "\r\n", log.LstdFlags), logger.Config{
		SlowThreshold:             time.Second,
		LogLevel:                  logger.Info,
		IgnoreRecordNotFoundError: true,
		ParameterizedQueries:      true,
		Colorful:                  false,
	})
	dsn := "host=gz.miaomiaole.xyz user=root password=10086 dbname=initdb port=7432 sslmode=disable TimeZone=Asia/Shanghai"
	db, err := gorm.Open(postgres.Open(dsn),
		&gorm.Config{
			Logger: logger,
			NamingStrategy: schema.NamingStrategy{
				TablePrefix:   "opt_",
				SingularTable: false,
			},
		})
	check_err(err)
	db.AutoMigrate(&Crawlers{})
	return db
}

func http_client(url string) *http.Response {
	request, _ := http.NewRequest("GET", url, nil)
	request.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36")

	res, _ := http.DefaultClient.Do(request)
	if res.StatusCode != 200 {
		log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
	}
	return res
}

func crawlers_baidu() {
	url := "https://www.baidu.com"
	client := http_client(url)
	defer client.Body.Close()
	doc, err := goquery.NewDocumentFromReader(client.Body)
	if err != nil {
		log.Fatal(err)
	}

	var strArr []string
	doc.Find("script").Each(func(i int, selection *goquery.Selection) {
		val, exists := selection.Attr("src")
		if exists {
			strArr = append(strArr, val)
		}
	})

	crawlers := &Crawlers{
		Url:     url,
		Name:    "github.com",
		Title:   doc.Find("title").Text(),
		Html:    strings.ReplaceAll(doc.Text(), " ", ""),
		Content: strings.Join(strArr, ","),
		Types:   "html",
	}
	conn := Postgre_conn()

	conn.Create(crawlers)
	fmt.Println(conn.RowsAffected)

}

func crawlers_imgs() {
	urlChan := make(chan string, 10)

	go func() {
		for imgUrl := range urlChan {
			imgs := http_client(imgUrl)
			all, _ := io.ReadAll(imgs.Body)
			println(string(all))
		}
	}()

	client := http_client("https://zhuanlan.zhihu.com/p/419646304")
	defer client.Body.Close()
	rootDom, _ := goquery.NewDocumentFromReader(client.Body)

	rootDom.Find("img").Each(func(i int, selection *goquery.Selection) {
		val, exists := selection.Attr("src")
		if exists {
			urlChan <- val
		}
		if i < 0 {
			close(urlChan)
		}
	})

}

type Crawlers struct {
	LocalModel
	Url     string
	Name    string
	Title   string
	Html    string
	Content string
	Types   string
}

type LocalModel gorm.Model

func (this *LocalModel) BeforeCreate(tx *gorm.DB) (err error) {
	node, err := snowflake.NewNode(1)
	if this.ID == 0 {
		this.ID = uint(node.Generate())
	}
	return nil
}
